# coding=utf-8
# Copyright 2024 The Google Research Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""List of datasets with their sample splits."""


DATASETS = {'higgs': {'train': 'train[:75%]',
                      'val': 'train[75%:95%]',
                      'test': 'train[-500000:]',  # a little less than 5%
                      },
            'higgs1M': {'train': 'train[:980000]',
                        'val': 'train[980000:1000000]',
                        'test': 'train[-500000:]',
                       },
            'higgs100k': {'train': 'train[:98000]',
                          'val': 'train[98000:100000]',
                          'test': 'train[-500000:]',
                          'pretext_validation': 'train[100000:110000]',
                          'imputation_train': 'train[110000:135000]',
                          'imputation_val': 'train[200000:225000]',
                         },
            'higgs100k1p': {'pretext': 'train[:98000]',
                            'train': 'train[97020:98000]',
                            'pretext_validation': 'train[100000:110000]',
                            'val': 'train[98000:100000]',
                            'test': 'train[-500000:]',
                            'imputation_train': 'train[110000:135000]',
                            'imputation_val': 'train[200000:225000]',
                           },
            'higgs100k10p': {'pretext': 'train[:98000]',
                             'train': 'train[88200:98000]',
                             'pretext_validation': 'train[100000:110000]',
                             'val': 'train[98000:100000]',
                             'test': 'train[-500000:]',
                             'imputation_train': 'train[110000:135000]',
                             'imputation_val': 'train[200000:225000]',
                            },
            'higgs100k20p': {'pretext': 'train[:98000]',
                             'train': 'train[78400:98000]',
                             'pretext_validation': 'train[100000:110000]',
                             'val': 'train[98000:100000]',
                             'test': 'train[-500000:]',
                             'imputation_train': 'train[110000:135000]',
                             'imputation_val': 'train[200000:225000]',
                            },
            'higgs50k': {'pretext': 'train[:50000]',
                         'pretext_validation': 'train[50000:75000]',
                         'train': 'train[75000:80000]',
                         'val': 'train[80000:82500]',
                         'test': 'train[82500:85000]',
                        },
            'higgs10k': {'pretext': 'train[:10000000]',
                         'train': 'train[:10000]',
                         'val': 'train[10000000:10500000]',
                         'pretext_validation': 'train[10000000:10500000]',
                         'test': 'train[-500000:]',
                        },
            'higgspre10k': {'pretext': 'train[:10000]',
                            'test': 'train[-500000:]',
                            'train': 'train[:10000]',
                            'val': 'train[10000:20000]',
                            'pretext_validation': 'train[-510000:-500000]',
                            'imputation_train': 'train[-530000:-520000]',
                            'imputation_val': 'train[-520000:-510000]',
                           },
            'higgspre40k': {'pretext': 'train[:40000]',
                            'test': 'train[-500000:]',
                            'train': 'train[:10000]',
                            'val': 'train[10000:20000]',
                            'pretext_validation': 'train[-510000:-500000]',
                            'imputation_train': 'train[-530000:-520000]',
                            'imputation_val': 'train[-520000:-510000]',
                           },
            'higgspre160k': {'pretext': 'train[:160000]',
                             'test': 'train[-500000:]',
                             'train': 'train[:10000]',
                             'val': 'train[10000:20000]',
                             'pretext_validation': 'train[-510000:-500000]',
                             'imputation_train': 'train[-530000:-520000]',
                             'imputation_val': 'train[-520000:-510000]',
                            },
            'higgspre640k': {'pretext': 'train[:640000]',
                             'test': 'train[-500000:]',
                             'train': 'train[:10000]',
                             'val': 'train[10000:20000]',
                             'pretext_validation': 'train[-510000:-500000]',
                             'imputation_train': 'train[-530000:-520000]',
                             'imputation_val': 'train[-520000:-510000]',
                            },
            'higgspre2560k': {'pretext': 'train[:2560000]',
                              'test': 'train[-500000:]',
                              'train': 'train[:10000]',
                              'val': 'train[10000:20000]',
                              'pretext_validation': 'train[-510000:-500000]',
                              'imputation_train': 'train[-530000:-520000]',
                              'imputation_val': 'train[-520000:-510000]',
                             },
            'higgspre640ksup100':
                {'pretext': 'train[:640000]',
                 'test': 'train[-500000:]',
                 'train': 'train[:100]',
                 'val': 'train[10000:20000]',
                 'pretext_validation': 'train[-510000:-500000]',
                },
            'higgspre640ksup500':
                {'pretext': 'train[:640000]',
                 'test': 'train[-500000:]',
                 'train': 'train[:500]',
                 'val': 'train[10000:20000]',
                 'pretext_validation': 'train[-510000:-500000]',
                },
            'higgspre640ksup1k':
                {'pretext': 'train[:640000]',
                 'test': 'train[-500000:]',
                 'train': 'train[:1000]',
                 'val': 'train[10000:20000]',
                 'pretext_validation': 'train[-510000:-500000]',
                },
            }

DATASET_TO_NUM_CLASSES = {'higgs': 2,
                          'covtype': 7,
                          'mnist': 10,
                          'adult': 2}
